library(vegan)
envdata<-read.csv('env.csv', header=TRUE, row.names=1)
#perform PCA using prcomp NOTE HERE: default for scale is FALSE, need to make scale=TRUE to use correlation matrix (correlation maxtrix NEEDS to be used if your variables are in different scales)
env.pca<-prcomp(envdata,scale=TRUE)
#go through and interpret the results from the PCA
summary(env.pca)
#note that this analysis presents SDs and NOT the variances (i.e. eigenvalues) - these need to be squared to become eigenvalues, so let's look at the the SD squared next
env.eig<-env.pca$sdev^2
#look at it
env.eig
#or you can calculate eigenvalues using pca.eigenval
pca.eigenval(env.pca)
#screeplot where each eigenvalue for each successive PC is depicted, overlay with a null distribution derived from the broken-stick model
screeplot(env.pca, bstick=TRUE)
#if you look at this plot, the first 2 PC are not higher than the broken stick model, but our notes say that this large amount of variance (46%) is still ecologically sig
#alternatively, test stat significance of first several PCA  using a Monte Carlo (randomization) test
ordi.monte(envdata,ord='pca',dim=5)
#here you get a frequence distribution of eigenvalues of randomized datasets compared to the real eigenvalues (so it's good if the real eigenvalue is lower than all the random ones) it also gives you p-values for each PC
#NOW we can examine the eigenvectors (i.e. variable loadings) only the first 5 are shown
env.pca$rotation
#note: here strong correlations (either pos or neg) indicate a high contribution toward the linear combination comprising each PCA
#or you can calculate eigenvectors using pca.eigenvec (cutoff is used to show only larger values, dim means only first 3 compontents are shown)
pca.eigenvec(env.pca,dim=5,digits=3,cutoff=.1)
#convert the eigenvector coefficients into simple correlation coefficients (i.e. Pearson product-moment correlations). The structure coefficients (loadings) are linear correlations between original vriable and pc scores
pca.structure(env.pca,envdata,dim=5,cutoff=0.4)
#NOW look at sample scores and ordination plots
env.scores<-env.pca$x
env.scores
#NOW visualize the ordination using biplot() or ordiplot()
#easy..
biplot(env.pca)
#more flexible and preferred approach, here choices are axes shown, type is the type of graph which may be points, text or none, display only sites or speces, the default for display is to show both, xlim and ylim (min,max) of the plot, then labels is optional text used for labels
ordiplot(env.pca,choices=c(1,2),type='text',display='sites',xlab="PC 1 (27%)", ylab="PC 2 (18%)")
#next add the eigenvectors (i.e. variable loadings)
#first add the arrows (multiply them by 5 so you can see them)
arrows(0,0,env.pca$rotation[,1]*5,env.pca$rotation[,2]*5,col='purple')
#then we added the text labels
text(env.pca$rotation[,1]*5.2,env.pca$rotation[,2]*5.2,row.names(env.pca$rotation))
#Exercise:
#use the env data to generate a covariance matrix based PCA
envcovar.pca<-prcomp(envdata,scale=FALSE)
#go through and interpret the results from the PCA
summary(envcovar.pca)
#note that this analysis presents SDs and NOT the variances (i.e. eigenvalues) - these need to be squared to become eigenvalues, so let's look at the the SD squared next
env.eig<-envcovar.pca$sdev^2
#look at it
env.eig
#or you can calculate eigenvalues using pca.eigenval
pca.eigenval(envcovar.pca)
#assess using broken stick
screeplot(envcovar.pca, bstick=TRUE)
#NOW we can examine the eigenvectors (i.e. variable loadings) only the first 5 are shown
envcovar.pca$rotation
pca.eigenvec(envcovar.pca,dim=5,digits=3,cutoff=.1)
#convert the eigenvector coefficients into simple correlation coefficients (i.e. Pearson product-moment correlations). The structure coefficients (loadings) are linear correlations between original vriable and pc scores
pca.structure(envcovar.pca,envdata,dim=5,cutoff=0.4)
#perform PCR on species abundance
speabu<-read.csv('abund.csv', header=TRUE,row.names=1)
speabu.pca<-prcomp(speabu[,-1],scale=FALSE)
summary(speabu.pca)
pca.eigenval(speabu.pca)
#assess using broken stick
screeplot(speabu.pca, bstick=TRUE)
biplot(speabu.pca)
ordiplot(speabu.pca,choices=c(1,2),type='text',display='sites',xlab="PC 1", ylab="PC 2")
#next add the eigenvectors (i.e. variable loadings)
#first add the arrows (multiply them by 5 so you can see them)
arrows(0,0,speabu.pca$rotation[,1]*15,env.pca$rotation[,2]*15,col='purple')
#then we added the text labels
text(env.pca$rotation[,1]*5.2,speabu.pca$rotation[,2]*5.2,row.names(env.pca$rotation))
#just wanted to go back and look at the biplot using the covariance.  it's quite obvious that elevation is really skewing this guy.  explains 99% of variance here and look like only driver.
biplot(envcovar.pca)
pca.eigenval(envcovar.pca)
pca.eigenvec(envcovar.pca)